suppressPackageStartupMessages(library(tidyverse))
library(survival)
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
setwd("~/Google Drive/My Drive/Analysis/METTL2A/")

devtools::load_all('~/Google Drive/My Drive/Scripts/R_packages/myUtilities/')
## ℹ Loading myUtilities
# Directory to save figures 
fig_expression <- 'Figures/TCGA_GTEx_TPM/Expression/'
fig_survival   <- 'Figures/TCGA_GTEx_TPM/Survival/'
tabledir <- 'Tables/TCGA_GTEx_TPM/'

Sys.setenv("VROOM_CONNECTION_SIZE" = 1e+06)

determine_sample_type <- function(df) {
  
  df |>
    mutate(
      sample_type = case_when(
        grepl('Tumor|Primary|Cancer|Metastatic' , `_sample_type`) ~ 'Tumor',
        grepl('Normal', `_sample_type`) ~ 'Normal',
        .default = NA
      )
    )
  
}

fill_primary_site_info <- function(df) {
  
  df |>
    mutate(
      primary_site = ifelse(
        !is.na(primary_site),
        yes = primary_site,
        no = str_extract(detailed_category, '^([A-Za-z]+)')
      )
    ) |>
    mutate(
      primary_site = str_remove(primary_site, '\xca')
    )
  
}

convert_primarysite_name <- function(df) {
  
  df |>
    mutate(
      primary_site_modified = str_replace_all(
        primary_site, 'Adrenal gland', 'Adrenal Gland'
      )
    ) |>
    mutate(
      primary_site_modified = str_replace_all(
        primary_site_modified, 'Blood Vessel|White blood cell', 'Blood'
      )
    )
  
  
}

plot_KM <- function(.genename, df, category) {
  
  filtered_df <- df |>
    dplyr::filter(gene_name == .genename) |>
    dplyr::filter(primary_site_modified == category) |>
    mutate(
      group = ifelse(TPM > median(TPM), 'high', 'low')
    )
  
  genename_fit <- surv_fit(
    Surv(OS.time, OS) ~ group,
    data = filtered_df
  )
  print(.genename)
  print(summary(genename_fit)$table)
  
  survivalplot <-
    ggsurvplot(
      genename_fit, pval = TRUE, pval.size = 3,
      title = paste0(.genename, '\n', category),
      palette = c('red', 'blue'),
      ggtheme = theme_survminer(base_size = 8),
      legend = 'bottom', legend.title = '', 
      censor.size = .1
    )
  survivalplot[[1]] |>
    ggsave_multiple_formats(
      outdir = fig_survival,
      basename = paste0(.genename, '_', category),
      width = 2.6, height = 4.2, fontsize = 7)
  
}

Read data

Sample information

TCGAtargetGTEx_sampleinfo <-
  read_TCGAtargetGTEx_sampleinfo() |>
  determine_sample_type() |>
  rename(primary_site = `_primary_site`) |>
  fill_primary_site_info() |>
  convert_primarysite_name()
TCGAtargetGTEx_sampleinfo
## # A tibble: 19,131 × 9
##    sample   detailed_category primary disease or t…¹ primary_site `_sample_type`
##    <chr>    <chr>             <chr>                  <chr>        <chr>         
##  1 TCGA-V4… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  2 TCGA-VD… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  3 TCGA-V4… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  4 TCGA-VD… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  5 TCGA-WC… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  6 TCGA-WC… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  7 TCGA-WC… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  8 TCGA-YZ… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
##  9 TCGA-V4… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
## 10 TCGA-WC… Uveal Melanoma    Uveal Melanoma         Eye          Primary Tumor 
## # ℹ 19,121 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 4 more variables: `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>
TCGAtargetGTEx_sampleinfo |>
  filter(primary_site == 'Uterus')
## # A tibble: 135 × 9
##    sample   detailed_category primary disease or t…¹ primary_site `_sample_type`
##    <chr>    <chr>             <chr>                  <chr>        <chr>         
##  1 TCGA-ND… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  2 TCGA-NF… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  3 TCGA-N8… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  4 TCGA-N7… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  5 TCGA-N6… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  6 TCGA-N6… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  7 TCGA-ND… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  8 TCGA-N8… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
##  9 TCGA-N6… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
## 10 TCGA-NA… Uterine Carcinos… Uterine Carcinosarcoma Uterus       Primary Tumor 
## # ℹ 125 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 4 more variables: `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>
TCGAtargetGTEx_sampleinfo |>
  filter(grepl('Uterine', detailed_category))
## # A tibble: 261 × 9
##    sample   detailed_category primary disease or t…¹ primary_site `_sample_type`
##    <chr>    <chr>             <chr>                  <chr>        <chr>         
##  1 TCGA-AJ… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  2 TCGA-BG… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  3 TCGA-AX… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  4 TCGA-DI… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  5 TCGA-AJ… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  6 TCGA-BG… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  7 TCGA-BG… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  8 TCGA-AX… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
##  9 TCGA-AX… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
## 10 TCGA-AX… Uterine Corpus E… Uterine Corpus Endome… Endometrium  Solid Tissue …
## # ℹ 251 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 4 more variables: `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>

Read survival data

TCGA_survival <-
  read_TCGA_survival()
TCGA_survival
## # A tibble: 10,496 × 9
##    sample             OS OS.time   DSS DSS.time   DFI DFI.time   PFI PFI.time
##    <chr>           <dbl>   <dbl> <dbl>    <dbl> <dbl>    <dbl> <dbl>    <dbl>
##  1 TCGA-OR-A5J1-01     1    1355     1     1355     1      754     1      754
##  2 TCGA-OR-A5J2-01     1    1677     1     1677    NA       NA     1      289
##  3 TCGA-OR-A5J3-01     0    2091     0     2091     1       53     1       53
##  4 TCGA-OR-A5J5-01     1     365     1      365    NA       NA     1       50
##  5 TCGA-OR-A5J6-01     0    2703     0     2703     0     2703     0     2703
##  6 TCGA-OR-A5J7-01     1     490     1      490    NA       NA     1      162
##  7 TCGA-OR-A5J8-01     1     579     1      579     1      530     1      530
##  8 TCGA-OR-A5J9-01     0    1352     0     1352     1      414     1      414
##  9 TCGA-OR-A5JA-01     1     922     1      922    NA       NA     1      922
## 10 TCGA-OR-A5JB-01     1     551    NA      551    NA       NA     0      551
## # ℹ 10,486 more rows

Read m3C writer expression

TCGAtargetGTEx_gene_TPMs <-
  read_TCGAtargetGTEx_rsem_gene_TPM()
TCGAtargetGTEx_gene_TPMs
## # A tibble: 60,498 × 19,133
##    gene_id  gene_name GTEX-S4Q7-0003-SM-3N…¹ `TCGA-19-1787-01` `TCGA-S9-A7J2-01`
##    <chr>    <chr>                      <dbl>             <dbl>             <dbl>
##  1 ENSG000… RP11-368…                  -3.46            -9.97              0.300
##  2 ENSG000… RP11-167…                  -9.97            -9.97             -9.97 
##  3 ENSG000… RP11-742…                  -3.63            -3.82             -3.05 
##  4 ENSG000… RAB4B                       4.60             5.30              4.89 
##  5 ENSG000… AC104071…                  -9.97            -9.97             -9.97 
##  6 ENSG000… TIGAR                       2.26             3.51              2.30 
##  7 ENSG000… LINC01224                  -6.51             0.865            -1.03 
##  8 ENSG000… MIR4802                    -9.97            -9.97             -9.97 
##  9 ENSG000… RNF44                       5.78             4.25              5.38 
## 10 ENSG000… DNAH3                      -4.29            -5.01             -9.97 
## # ℹ 60,488 more rows
## # ℹ abbreviated name: ¹​`GTEX-S4Q7-0003-SM-3NM8M`
## # ℹ 19,128 more variables: `GTEX-QV31-1626-SM-2S1QC` <dbl>,
## #   `TCGA-G3-A3CH-11` <dbl>, `TCGA-B5-A5OE-01` <dbl>,
## #   `GTEX-13QIC-0011-R1a-SM-5O9CJ` <dbl>, `TCGA-B2-5641-11` <dbl>,
## #   `GTEX-ZPCL-0126-SM-4WWC8` <dbl>, `TARGET-20-PANGDN-09` <dbl>,
## #   `GTEX-S33H-1226-SM-4AD69` <dbl>, `GTEX-X88G-0426-SM-47JZ5` <dbl>, …
# m3C_writers_TCGAtargetGTEx_norm_genecounts <-
#   TCGAtargetGTEx_norm_genecounts |>
#   filter(grepl('^METTL[2|6|8][AB]?$', gene_name)) |>
#   pivot_longer(
#     cols = -c(gene_id, gene_name),
#     names_to = 'sample', values_to = 'norm_count'
#   ) |>
#   left_join(TCGAtargetGTEx_sampleinfo) |>
#   filter(!is.na(sample_type))
# m3C_writers_TCGAtargetGTEx_norm_genecounts


m3C_writers_TCGAtargetGTEx_gene_TPMs <-
  TCGAtargetGTEx_gene_TPMs |>
  filter(grepl('^METTL[2|6|8][AB]?$', gene_name)) |>
  pivot_longer(
    cols = -c(gene_id, gene_name),
    names_to = 'sample', values_to = 'TPM'
  ) |>
  left_join(TCGAtargetGTEx_sampleinfo) |>
  filter(!is.na(sample_type))
## Joining with `by = join_by(sample)`
m3C_writers_TCGAtargetGTEx_gene_TPMs
## # A tibble: 74,788 × 12
##    gene_id       gene_name sample   TPM detailed_category primary disease or t…¹
##    <chr>         <chr>     <chr>  <dbl> <chr>             <chr>                 
##  1 ENSG00000123… METTL8    TCGA-… 3.25  Glioblastoma Mul… Glioblastoma Multifor…
##  2 ENSG00000123… METTL8    TCGA-… 2.52  Brain Lower Grad… Brain Lower Grade Gli…
##  3 ENSG00000123… METTL8    GTEX-… 3.09  Artery - Tibial   Artery - Tibial       
##  4 ENSG00000123… METTL8    TCGA-… 0.896 Liver Hepatocell… Liver Hepatocellular …
##  5 ENSG00000123… METTL8    TCGA-… 4.38  Uterine Corpus E… Uterine Corpus Endome…
##  6 ENSG00000123… METTL8    GTEX-… 2.06  Brain - Hippocam… Brain - Hippocampus   
##  7 ENSG00000123… METTL8    TCGA-… 2.32  Kidney Clear Cel… Kidney Clear Cell Car…
##  8 ENSG00000123… METTL8    GTEX-… 1.90  Thyroid           Thyroid               
##  9 ENSG00000123… METTL8    TARGE… 1.64  Acute Myeloid Le… Acute Myeloid Leukemia
## 10 ENSG00000123… METTL8    GTEX-… 0.178 Pancreas          Pancreas              
## # ℹ 74,778 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 6 more variables: primary_site <chr>, `_sample_type` <chr>,
## #   `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>

Plot distribution

m3C_writers_TCGAtargetGTEx_gene_TPMs |>
  ggplot(aes(
    x = primary_site_modified, y = TPM,
    colour = sample_type, fill = sample_type)
  ) +
  geom_violin() +
  scale_y_log10() +
  coord_flip() +
  facet_wrap( ~ gene_name)
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 1429 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Calculate mean expression in normal tissues

median_TPM_in_normal <-
  m3C_writers_TCGAtargetGTEx_gene_TPMs |>
  filter(sample_type == 'Normal') |>
  group_by(gene_name, primary_site_modified) |>
  reframe(median_in_normal = median(TPM, na.rm = TRUE))
median_TPM_in_normal
## # A tibble: 152 × 3
##    gene_name primary_site_modified median_in_normal
##    <chr>     <chr>                            <dbl>
##  1 METTL2A   Adipose Tissue                    2.62
##  2 METTL2A   Adrenal Gland                     2.88
##  3 METTL2A   Bile duct                         1.84
##  4 METTL2A   Bladder                           2.92
##  5 METTL2A   Blood                             2.58
##  6 METTL2A   Brain                             2.25
##  7 METTL2A   Breast                            2.93
##  8 METTL2A   Cervix                            2.67
##  9 METTL2A   Cervix Uteri                      2.75
## 10 METTL2A   Colon                             2.83
## # ℹ 142 more rows
# mean_TPM_in_normal <-
#   m3C_writers_TCGAtargetGTEx_gene_TPMs |>
#   filter(sample_type == 'Normal') |>
#   group_by(gene_name, primary_site_modified) |>
#   reframe(mean_in_normal = mean(TPM, na.rm = TRUE))
# mean_TPM_in_normal

Calculate relative expression

m3C_writers_TCGAtargetGTEx_rel_expression_TPM <-
  m3C_writers_TCGAtargetGTEx_gene_TPMs |>
  left_join(median_TPM_in_normal) |>
  mutate(rel_expression = TPM - median_in_normal)
## Joining with `by = join_by(gene_name, primary_site_modified)`
m3C_writers_TCGAtargetGTEx_rel_expression_TPM
## # A tibble: 74,788 × 14
##    gene_id       gene_name sample   TPM detailed_category primary disease or t…¹
##    <chr>         <chr>     <chr>  <dbl> <chr>             <chr>                 
##  1 ENSG00000123… METTL8    TCGA-… 3.25  Glioblastoma Mul… Glioblastoma Multifor…
##  2 ENSG00000123… METTL8    TCGA-… 2.52  Brain Lower Grad… Brain Lower Grade Gli…
##  3 ENSG00000123… METTL8    GTEX-… 3.09  Artery - Tibial   Artery - Tibial       
##  4 ENSG00000123… METTL8    TCGA-… 0.896 Liver Hepatocell… Liver Hepatocellular …
##  5 ENSG00000123… METTL8    TCGA-… 4.38  Uterine Corpus E… Uterine Corpus Endome…
##  6 ENSG00000123… METTL8    GTEX-… 2.06  Brain - Hippocam… Brain - Hippocampus   
##  7 ENSG00000123… METTL8    TCGA-… 2.32  Kidney Clear Cel… Kidney Clear Cell Car…
##  8 ENSG00000123… METTL8    GTEX-… 1.90  Thyroid           Thyroid               
##  9 ENSG00000123… METTL8    TARGE… 1.64  Acute Myeloid Le… Acute Myeloid Leukemia
## 10 ENSG00000123… METTL8    GTEX-… 0.178 Pancreas          Pancreas              
## # ℹ 74,778 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 8 more variables: primary_site <chr>, `_sample_type` <chr>,
## #   `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>, median_in_normal <dbl>, rel_expression <dbl>
# m3C_writers_TCGAtargetGTEx_rel_expression_TPM <-
#   m3C_writers_TCGAtargetGTEx_gene_TPMs |>
#   left_join(mean_TPM_in_normal) |>
#   mutate(rel_expression = TPM / mean_in_normal)
# m3C_writers_TCGAtargetGTEx_rel_expression_TPM

Plot heatmap

m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary <- 
  m3C_writers_TCGAtargetGTEx_rel_expression_TPM |> 
  filter(gene_name != 'ALKBH1') |> 
  filter(sample_type == 'Tumor') |>
  group_by(gene_name, sample_type, primary_site_modified) |> 
  reframe(
    n = n(),
    mean = mean(rel_expression),
    median = median(rel_expression)
  ) |> 
  arrange(-mean)
m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary |> 
  export_tsv(outdir = tabledir)
## 
## Exported to: Tables/TCGA_GTEx_TPM/m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary_2025-06-10.tsv
## # A tibble: 120 × 6
##    gene_name sample_type primary_site_modified     n  mean median
##    <chr>     <chr>       <chr>                 <int> <dbl>  <dbl>
##  1 METTL8    Tumor       Bile duct                36  2.22   2.20
##  2 METTL8    Tumor       Stomach                 414  1.93   1.90
##  3 METTL8    Tumor       Pancreas                179  1.87   1.90
##  4 METTL6    Tumor       Bile duct                36  1.85   1.87
##  5 METTL6    Tumor       Pancreas                179  1.66   1.71
##  6 METTL2A   Tumor       Bile duct                36  1.66   1.69
##  7 METTL2B   Tumor       Brain                   689  1.59   1.59
##  8 METTL2A   Tumor       Brain                   689  1.58   1.61
##  9 METTL2B   Tumor       Bile duct                36  1.52   1.60
## 10 METTL8    Tumor       Cervix                  306  1.43   1.49
## # ℹ 110 more rows
m3C_relexpression_in_tumor_heatmap <- 
  m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary |> 
  filter(!is.na(median)) |> 
  ggplot(aes(
    x = gene_name |> str_remove('METTL'), 
    y = reorder(primary_site_modified, mean), 
    fill = mean
  )) +
  geom_tile() +
  scale_fill_gradient2(
    low = 'blue', mid = 'gray90', high = 'red', midpoint = 0, 
    limits = c(-2.5, 2.5)
  ) +
  labs(x = '', y = '') +
  theme_minimal(base_size = 8) +
  theme(
    legend.position = 'bottom', 
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
m3C_relexpression_in_tumor_heatmap |> 
  ggsave_multiple_formats(
    outdir = fig_expression, 
    width = 4.5, height = 9, units = 'cm', fontsize = 7
  )

Plot distribution of relative expression

m3C_writers_TCGAtargetGTEx_rel_expression_TPM |>
  filter(gene_name != 'ALKBH1') |> 
  filter(sample_type == 'Tumor') |>
  filter(!is.na(rel_expression)) |>
  ggplot(aes(
    x = reorder(primary_site_modified, rel_expression),
    y = rel_expression)) +
  geom_violin() +
  geom_hline(yintercept = 1) +
  scale_y_log10(limits = c(0.4, 2.2), breaks = c(0.5, 0.8, 1, 1.25, 2)) +
  labs(x = '', y = 'relative expression (normal = 1)') +
  coord_flip() +
  facet_wrap( ~ gene_name)
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_y_log10(limits = c(0.4, 2.2), breaks = c(0.5, 0.8, 1, 1.25, :
## log-10 transformation introduced infinite values.
## Warning: Removed 18771 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

m3C_writers_TCGAtargetGTEx_rel_expression_TPM |>
  filter(gene_name != 'ALKBH1') |> 
  filter(sample_type == 'Tumor') |>
  ggplot(aes(
    x = reorder(primary_site_modified, rel_expression, median),
    y = rel_expression)) +
  geom_violin() +
  geom_hline(yintercept = 1) +
  scale_y_log10() +
  coord_flip() +
  facet_wrap( ~ gene_name)
## Warning in transformation$transform(x): NaNs produced
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## Warning: Removed 12260 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.

test

m3C_relexpression_in_tumor_pvalues <- 
  m3C_writers_TCGAtargetGTEx_rel_expression_TPM |> 
  filter(gene_name != 'ALKBH1') |> 
  right_join(
    m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary |> 
      select(primary_site_modified) |> 
      distinct()
  ) |> 
  filter(!is.na(rel_expression)) |> 
  group_by(gene_name, primary_site_modified) |>
  rstatix::wilcox_test(rel_expression ~ sample_type, ref.group = 'Normal') |> 
  left_join(m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary) |>
  filter(!is.na(mean))
## Joining with `by = join_by(primary_site_modified)`
## Joining with `by = join_by(gene_name, primary_site_modified)`
m3C_relexpression_in_tumor_pvalues 
## # A tibble: 104 × 13
##    gene_name primary_site_modified .y.       group1 group2    n1    n2 statistic
##    <chr>     <chr>                 <chr>     <chr>  <chr>  <int> <int>     <dbl>
##  1 METTL2A   Adrenal Gland         rel_expr… Normal Tumor    128    77     6404 
##  2 METTL2A   Bile duct             rel_expr… Normal Tumor      9    36        2 
##  3 METTL2A   Bladder               rel_expr… Normal Tumor     28   407     1936.
##  4 METTL2A   Blood                 rel_expr… Normal Tumor    943   595   269447 
##  5 METTL2A   Brain                 rel_expr… Normal Tumor   1157   689    16188.
##  6 METTL2A   Breast                rel_expr… Normal Tumor    292  1099    33264 
##  7 METTL2A   Cervix                rel_expr… Normal Tumor      3   306      108 
##  8 METTL2A   Colon                 rel_expr… Normal Tumor    349   290     8842.
##  9 METTL2A   Endometrium           rel_expr… Normal Tumor     23   181      691 
## 10 METTL2A   Esophagus             rel_expr… Normal Tumor    668   182    12188.
## # ℹ 94 more rows
## # ℹ 5 more variables: p <dbl>, sample_type <chr>, n <int>, mean <dbl>,
## #   median <dbl>
m3C_relexpression_in_tumor_pvalues |> 
  export_tsv(outdir = tabledir)
## 
## Exported to: Tables/TCGA_GTEx_TPM/m3C_relexpression_in_tumor_pvalues_2025-06-10.tsv
## # A tibble: 104 × 13
##    gene_name primary_site_modified .y.       group1 group2    n1    n2 statistic
##    <chr>     <chr>                 <chr>     <chr>  <chr>  <int> <int>     <dbl>
##  1 METTL2A   Adrenal Gland         rel_expr… Normal Tumor    128    77     6404 
##  2 METTL2A   Bile duct             rel_expr… Normal Tumor      9    36        2 
##  3 METTL2A   Bladder               rel_expr… Normal Tumor     28   407     1936.
##  4 METTL2A   Blood                 rel_expr… Normal Tumor    943   595   269447 
##  5 METTL2A   Brain                 rel_expr… Normal Tumor   1157   689    16188.
##  6 METTL2A   Breast                rel_expr… Normal Tumor    292  1099    33264 
##  7 METTL2A   Cervix                rel_expr… Normal Tumor      3   306      108 
##  8 METTL2A   Colon                 rel_expr… Normal Tumor    349   290     8842.
##  9 METTL2A   Endometrium           rel_expr… Normal Tumor     23   181      691 
## 10 METTL2A   Esophagus             rel_expr… Normal Tumor    668   182    12188.
## # ℹ 94 more rows
## # ℹ 5 more variables: p <dbl>, sample_type <chr>, n <int>, mean <dbl>,
## #   median <dbl>
m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary |> 
  arrange(median)
## # A tibble: 120 × 6
##    gene_name sample_type primary_site_modified     n   mean median
##    <chr>     <chr>       <chr>                 <int>  <dbl>  <dbl>
##  1 METTL6    Tumor       Testis                  154 -1.72  -1.70 
##  2 METTL6    Tumor       Blood                   595 -1.07  -1.13 
##  3 METTL8    Tumor       Thyroid Gland           512 -0.773 -0.718
##  4 METTL2A   Tumor       Blood                   595 -0.607 -0.711
##  5 METTL2B   Tumor       Thyroid Gland           512 -0.437 -0.374
##  6 METTL8    Tumor       Adrenal Gland            77 -0.430 -0.365
##  7 METTL2B   Tumor       Thymus                  119 -0.363 -0.292
##  8 METTL2A   Tumor       Adrenal Gland            77 -0.395 -0.289
##  9 METTL6    Tumor       Thyroid Gland           512 -0.337 -0.278
## 10 METTL2A   Tumor       Thyroid Gland           512 -0.322 -0.257
## # ℹ 110 more rows
m3C_relexpression_in_tumor_pvalues_heatmap <- 
  m3C_relexpression_in_tumor_pvalues  |> 
  mutate(
    minuslog10p = -log10(p),
    group = case_when(
      p < .05 & mean > 0 ~ 'A',
      p < .05 & mean < 0 ~ 'B',
      .default = 'C'
    )
  ) |> 
  ggplot(aes(
    x = gene_name |> str_remove('METTL'),
    y = reorder(primary_site_modified, mean),
    fill = group
  )) +
  geom_tile() +
  labs(x = '', y = '') +
  scale_fill_manual(values = c('red', 'blue', 'gray')) +
  theme_minimal(base_size = 8) +
  theme(
    legend.position = 'bottom', 
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
m3C_relexpression_in_tumor_pvalues_heatmap |> 
  ggsave_multiple_formats(
    outdir = fig_expression, 
    width = 4.5, height = 9, units = 'cm', fontsize = 7
  )

# m3C_writers_TCGAtargetGTEx_rel_expression_TPM |>
#   filter(sample_type == 'Tumor') |>
#   ggplot(aes(
#     x = reorder(primary_site_modified, rel_expression),
#     y = rel_expression)) +
#   geom_violin() +
#   geom_hline(yintercept = 1) +
#   scale_y_log10() +
#   coord_flip() +
#   facet_wrap( ~ gene_name)

Survival analysis

# m3C_writers_TCGA_gene_TPMs_survival <-
#   m3C_writers_TCGAtargetGTEx_gene_TPMs |>
#   right_join(TCGA_survival)
# m3C_writers_TCGA_gene_TPMs_survival

m3C_writers_TCGA_TPMs_survival <-
  m3C_writers_TCGAtargetGTEx_gene_TPMs |>
  right_join(TCGA_survival)
## Joining with `by = join_by(sample)`
m3C_writers_TCGA_TPMs_survival |> 
  export_tsv(outdir = tabledir, compression = 'gz')
## 
## Exported to: Tables/TCGA_GTEx_TPM/m3C_writers_TCGA_TPMs_survival_2025-06-10.tsv.gz
## # A tibble: 41,984 × 20
##    gene_id       gene_name sample   TPM detailed_category primary disease or t…¹
##    <chr>         <chr>     <chr>  <dbl> <chr>             <chr>                 
##  1 ENSG00000123… METTL8    TCGA-… 3.25  Glioblastoma Mul… Glioblastoma Multifor…
##  2 ENSG00000123… METTL8    TCGA-… 2.52  Brain Lower Grad… Brain Lower Grade Gli…
##  3 ENSG00000123… METTL8    TCGA-… 0.896 Liver Hepatocell… Liver Hepatocellular …
##  4 ENSG00000123… METTL8    TCGA-… 4.38  Uterine Corpus E… Uterine Corpus Endome…
##  5 ENSG00000123… METTL8    TCGA-… 2.32  Kidney Clear Cel… Kidney Clear Cell Car…
##  6 ENSG00000123… METTL8    TCGA-… 2.38  Breast Invasive … Breast Invasive Carci…
##  7 ENSG00000123… METTL8    TCGA-… 2.06  Breast Invasive … Breast Invasive Carci…
##  8 ENSG00000123… METTL8    TCGA-… 3.02  Cervical & Endoc… Cervical & Endocervic…
##  9 ENSG00000123… METTL8    TCGA-… 2.57  Lung Adenocarcin… Lung Adenocarcinoma   
## 10 ENSG00000123… METTL8    TCGA-… 2.50  Lung Adenocarcin… Lung Adenocarcinoma   
## # ℹ 41,974 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 14 more variables: primary_site <chr>, `_sample_type` <chr>,
## #   `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>, OS <dbl>, OS.time <dbl>, DSS <dbl>,
## #   DSS.time <dbl>, DFI <dbl>, DFI.time <dbl>, PFI <dbl>, PFI.time <dbl>
temp <- 
  m3C_writers_TCGA_TPMs_survival |> 
  filter(gene_name == 'METTL2A') |>
  filter(primary_site_modified == 'Pancreas') |> 
  #group_by(primary_site_modified) |> 
  mutate(
    group = ifelse(TPM > median(TPM), 'high', 'low')
  )
temp
## # A tibble: 183 × 21
##    gene_id       gene_name sample   TPM detailed_category primary disease or t…¹
##    <chr>         <chr>     <chr>  <dbl> <chr>             <chr>                 
##  1 ENSG00000087… METTL2A   TCGA-…  3.64 Pancreatic Adeno… Pancreatic Adenocarci…
##  2 ENSG00000087… METTL2A   TCGA-…  3.14 Pancreatic Adeno… Pancreatic Adenocarci…
##  3 ENSG00000087… METTL2A   TCGA-…  2.69 Pancreatic Adeno… Pancreatic Adenocarci…
##  4 ENSG00000087… METTL2A   TCGA-…  3.74 Pancreatic Adeno… Pancreatic Adenocarci…
##  5 ENSG00000087… METTL2A   TCGA-…  2.95 Pancreatic Adeno… Pancreatic Adenocarci…
##  6 ENSG00000087… METTL2A   TCGA-…  2.46 Pancreatic Adeno… Pancreatic Adenocarci…
##  7 ENSG00000087… METTL2A   TCGA-…  3.40 Pancreatic Adeno… Pancreatic Adenocarci…
##  8 ENSG00000087… METTL2A   TCGA-…  3.19 Pancreatic Adeno… Pancreatic Adenocarci…
##  9 ENSG00000087… METTL2A   TCGA-…  2.24 Pancreatic Adeno… Pancreatic Adenocarci…
## 10 ENSG00000087… METTL2A   TCGA-…  2.71 Pancreatic Adeno… Pancreatic Adenocarci…
## # ℹ 173 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 15 more variables: primary_site <chr>, `_sample_type` <chr>,
## #   `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>, OS <dbl>, OS.time <dbl>, DSS <dbl>,
## #   DSS.time <dbl>, DFI <dbl>, DFI.time <dbl>, PFI <dbl>, PFI.time <dbl>,
## #   group <chr>
do_coxph_2group <- function(df) {
  
  df <- df |>
    mutate(
      group = ifelse(TPM > median(TPM), 'high', 'low')
    )
  
  coxph(Surv(OS.time, OS) ~ group, df) |>
    broom::tidy()
  
}
temp |> 
  do_coxph_2group()
## # A tibble: 1 × 5
##   term     estimate std.error statistic p.value
##   <chr>       <dbl>     <dbl>     <dbl>   <dbl>
## 1 grouplow   -0.398     0.212     -1.87  0.0610
m3C_writers_TCGA_TPMs_survival_coxph_p0.05 <- 
  m3C_writers_TCGA_TPMs_survival |> 
  filter(!is.na(primary_site_modified)) |>
  group_by(gene_name, primary_site_modified) |> 
  nest() |> 
  mutate(model = map(data, do_coxph_2group)) |> 
  select(-data) |> 
  unnest(cols = model) |> 
  mutate(group = case_when(
    estimate > 0 & p.value < .05 ~ 'favorable in high',
    estimate < 0 & p.value < .05 ~ 'unfavorable in high',
    .default = 'not significant'
  )
  )
m3C_writers_TCGA_TPMs_survival_coxph_p0.05 |> 
  export_tsv(outdir = tabledir)
## 
## Exported to: Tables/TCGA_GTEx_TPM/m3C_writers_TCGA_TPMs_survival_coxph_p0.05_2025-06-10.tsv
## # A tibble: 116 × 8
## # Groups:   gene_name, primary_site_modified [116]
##    gene_name primary_site_modified term    estimate std.error statistic  p.value
##    <chr>     <chr>                 <chr>      <dbl>     <dbl>     <dbl>    <dbl>
##  1 METTL8    Brain                 groupl…  -1.28      0.137     -9.29  1.57e-20
##  2 METTL8    Liver                 groupl…  -0.139     0.158     -0.878 3.80e- 1
##  3 METTL8    Endometrium           groupl…  -0.0635    0.335     -0.190 8.50e- 1
##  4 METTL8    Kidney                groupl…  -0.147     0.124     -1.19  2.34e- 1
##  5 METTL8    Breast                groupl…   0.0201    0.142      0.141 8.88e- 1
##  6 METTL8    Cervix                groupl…  -0.218     0.236     -0.924 3.55e- 1
##  7 METTL8    Lung                  groupl…  -0.170     0.0946    -1.80  7.19e- 2
##  8 METTL8    Esophagus             groupl…   0.231     0.226      1.02  3.06e- 1
##  9 METTL8    Soft tissue,Bone      groupl…   0.245     0.202      1.21  2.25e- 1
## 10 METTL8    Colon                 groupl…   0.167     0.223      0.748 4.54e- 1
## # ℹ 106 more rows
## # ℹ 1 more variable: group <chr>
m3C_writers_TCGA_TPMs_survival_coxph_p0.1 <- 
  m3C_writers_TCGA_TPMs_survival |> 
  filter(!is.na(primary_site_modified)) |>
  group_by(gene_name, primary_site_modified) |> 
  nest() |> 
  mutate(model = map(data, do_coxph_2group)) |> 
  select(-data) |> 
  unnest(cols = model) |> 
  mutate(group = case_when(
    estimate > 0 & p.value < .1 ~ 'favorable in high',
    estimate < 0 & p.value < .1 ~ 'unfavorable in high',
    .default = 'not significant'
  )
  )
m3C_writers_TCGA_TPMs_survival_coxph_p0.1 |> 
  export_tsv(outdir = tabledir)
## 
## Exported to: Tables/TCGA_GTEx_TPM/m3C_writers_TCGA_TPMs_survival_coxph_p0.1_2025-06-10.tsv
## # A tibble: 116 × 8
## # Groups:   gene_name, primary_site_modified [116]
##    gene_name primary_site_modified term    estimate std.error statistic  p.value
##    <chr>     <chr>                 <chr>      <dbl>     <dbl>     <dbl>    <dbl>
##  1 METTL8    Brain                 groupl…  -1.28      0.137     -9.29  1.57e-20
##  2 METTL8    Liver                 groupl…  -0.139     0.158     -0.878 3.80e- 1
##  3 METTL8    Endometrium           groupl…  -0.0635    0.335     -0.190 8.50e- 1
##  4 METTL8    Kidney                groupl…  -0.147     0.124     -1.19  2.34e- 1
##  5 METTL8    Breast                groupl…   0.0201    0.142      0.141 8.88e- 1
##  6 METTL8    Cervix                groupl…  -0.218     0.236     -0.924 3.55e- 1
##  7 METTL8    Lung                  groupl…  -0.170     0.0946    -1.80  7.19e- 2
##  8 METTL8    Esophagus             groupl…   0.231     0.226      1.02  3.06e- 1
##  9 METTL8    Soft tissue,Bone      groupl…   0.245     0.202      1.21  2.25e- 1
## 10 METTL8    Colon                 groupl…   0.167     0.223      0.748 4.54e- 1
## # ℹ 106 more rows
## # ℹ 1 more variable: group <chr>
m3C_prognosis_in_tumors_heatmap_p0.1 <- 
  m3C_writers_TCGA_TPMs_survival_coxph_p0.1 |> 
  filter(gene_name != 'ALKBH1') |> 
  left_join(m3C_writers_TCGAtargetGTEx_rel_expression_TPM_summary) |>
  filter(!is.na(mean)) |>
  ggplot(aes(
    x = gene_name |> str_remove('METTL'),
    y = reorder(primary_site_modified, mean),
    fill = group
  )) +
  geom_tile() +
  scale_fill_manual(values = c('blue', 'gray', 'red')) +
  labs(x = '', y = '') +
  theme_minimal(base_size = 8) +
  theme(
    legend.position = 'bottom',
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
## Joining with `by = join_by(gene_name, primary_site_modified)`
m3C_prognosis_in_tumors_heatmap_p0.1 |> 
  ggsave_multiple_formats(
    outdir = fig_survival, 
    width = 4.5, height = 9, units = 'cm', fontsize = 7
  )

#figdir_survival <- 'Figures/TCGA/Survival/Temp/'

plot_KM('METTL2A', m3C_writers_TCGA_TPMs_survival, 'Pancreas')
## [1] "METTL2A"
##            records n.max n.start events     rmean se(rmean) median 0.95LCL
## group=high      91    91      91     59  850.1233   88.9690    593     485
## group=low       92    92      92     36 1358.8594  169.8908    738     517
##            0.95UCL
## group=high     691
## group=low       NA

plot_KM('METTL2A', m3C_writers_TCGA_TPMs_survival, 'Uterus')
## [1] "METTL2A"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high      28    28      28     15 1855.575  354.1641   1526     667
## group=low       29    29      29     20 1036.729  196.2224    550     447
##            0.95UCL
## group=high      NA
## group=low     1591

plot_KM('METTL2A', m3C_writers_TCGA_TPMs_survival, 'Ovary')
## [1] "METTL2A"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     213   213     213    129 1639.658  100.5764   1348    1199
## group=low      212   212     212    136 1816.555  131.3525   1355    1247
##            0.95UCL
## group=high    1562
## group=low     1579

m3C_writers <- c('METTL2A', 'METTL2B', 'METTL6', 'METTL8')

m3C_writers |>
  walk(
    plot_KM,
    df = m3C_writers_TCGA_TPMs_survival,
    category = 'Pancreas'
  )
## [1] "METTL2A"
##            records n.max n.start events     rmean se(rmean) median 0.95LCL
## group=high      91    91      91     59  850.1233   88.9690    593     485
## group=low       92    92      92     36 1358.8594  169.8908    738     517
##            0.95UCL
## group=high     691
## group=low       NA
## [1] "METTL2B"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high      91    91      91     54 1007.337  126.7941    598     486
## group=low       92    92      92     41 1160.969  153.0726    634     498
##            0.95UCL
## group=high     695
## group=low       NA
## [1] "METTL6"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high      91    91      91     45 1154.815  154.6208    598     481
## group=low       92    92      92     50 1017.051  124.3911    627     532
##            0.95UCL
## group=high      NA
## group=low     1059
## [1] "METTL8"
##            records n.max n.start events     rmean se(rmean) median 0.95LCL
## group=high      91    91      91     48  797.5719  98.44362    593     518
## group=low       92    92      92     47 1235.0822 139.74232    652     481
##            0.95UCL
## group=high     732
## group=low       NA
m3C_writers |>
  walk(
    plot_KM,
    df = m3C_writers_TCGA_TPMs_survival,
    category = 'Uterus'
  )
## [1] "METTL2A"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high      28    28      28     15 1855.575  354.1641   1526     667
## group=low       29    29      29     20 1036.729  196.2224    550     447
##            0.95UCL
## group=high      NA
## group=low     1591
## [1] "METTL2B"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high      28    28      28     17 1693.765  347.0274    810     611
## group=low       29    29      29     18 1196.591  197.4359    911     481
##            0.95UCL
## group=high      NA
## group=low       NA
## [1] "METTL6"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high      28    28      28     17 1552.116  342.0106    810     378
## group=low       29    29      29     18 1222.135  198.5799    771     541
##            0.95UCL
## group=high      NA
## group=low       NA
## [1] "METTL8"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high      28    28      28     20 1351.833  274.2731    771     597
## group=low       29    29      29     15 1457.497  229.8042    911     481
##            0.95UCL
## group=high    2043
## group=low       NA
m3C_writers |>
  walk(
    plot_KM,
    df = m3C_writers_TCGA_TPMs_survival,
    category = 'Ovary'
  )
## [1] "METTL2A"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     213   213     213    129 1639.658  100.5764   1348    1199
## group=low      212   212     212    136 1816.555  131.3525   1355    1247
##            0.95UCL
## group=high    1562
## group=low     1579
## [1] "METTL2B"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     213   213     213    129 1628.579  116.0621   1266    1114
## group=low      212   212     212    136 1887.828  130.9679   1399    1319
##            0.95UCL
## group=high    1470
## group=low     1680
## [1] "METTL6"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     213   213     213    125 1674.182  115.8817   1324    1187
## group=low      212   212     212    140 1789.765  130.0695   1446    1249
##            0.95UCL
## group=high    1470
## group=low     1688
## [1] "METTL8"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     213   213     213    122 1852.216  131.4509   1442    1249
## group=low      212   212     212    143 1651.620  118.4622   1348    1157
##            0.95UCL
## group=high    1646
## group=low     1451
m3C_writers |>
  walk(
    plot_KM,
    df = m3C_writers_TCGA_TPMs_survival,
    category = 'Lung'
  )
## [1] "METTL2A"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     551   551     551    226 2215.707  173.9406   1492    1315
## group=low      556   556     556    225 2616.360  210.8274   1632    1268
##            0.95UCL
## group=high    1736
## group=low     1912
## [1] "METTL2B"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     551   551     551    218 2251.161  225.0100   1501    1315
## group=low      556   556     556    233 2453.740  199.5207   1531    1268
##            0.95UCL
## group=high    1798
## group=low     1912
## [1] "METTL6"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     553   553     553    233 2026.737  122.3304   1485    1265
## group=low      554   554     554    218 2691.553  208.8799   1656    1344
##            0.95UCL
## group=high    1713
## group=low     1953
## [1] "METTL8"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     550   550     550    244 2175.849  177.1018   1426    1171
## group=low      557   557     557    207 2700.092  234.0303   1600    1379
##            0.95UCL
## group=high    1856
## group=low     1912
m3C_writers |>
  walk(
    plot_KM,
    df = m3C_writers_TCGA_TPMs_survival,
    category = 'Stomach'
  )
## [1] "METTL2A"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     222   222     222     81 1509.318  219.2083   1043     801
## group=low      221   221     221     86 1710.898  171.6505    794     607
##            0.95UCL
## group=high      NA
## group=low       NA
## [1] "METTL2B"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     221   221     221     85 1590.843  199.3109   1043     766
## group=low      222   222     222     82 1736.074  179.6754    940     669
##            0.95UCL
## group=high      NA
## group=low       NA
## [1] "METTL6"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     223   223     223     96 1390.860  221.9040    675     570
## group=low      220   220     220     71 1856.085  184.1478   1407    1095
##            0.95UCL
## group=high      NA
## group=low       NA
## [1] "METTL8"
##            records n.max n.start events    rmean se(rmean) median 0.95LCL
## group=high     222   222     222     82 1619.424  210.1889   1043     675
## group=low      221   221     221     85 1668.891  183.9334    881     762
##            0.95UCL
## group=high      NA
## group=low       NA

Expression in tumor and normal

#

m3C_writers_TCGAtargetGTEx_TPMs_Pancreas <- 
  m3C_writers_TCGAtargetGTEx_gene_TPMs |> 
  filter(primary_site_modified == 'Pancreas')
m3C_writers_TCGAtargetGTEx_TPMs_Pancreas |> 
  export_tsv(outdir = tabledir)
## 
## Exported to: Tables/TCGA_GTEx_TPM/m3C_writers_TCGAtargetGTEx_TPMs_Pancreas_2025-06-10.tsv
## # A tibble: 1,400 × 12
##    gene_id      gene_name sample    TPM detailed_category primary disease or t…¹
##    <chr>        <chr>     <chr>   <dbl> <chr>             <chr>                 
##  1 ENSG0000012… METTL8    GTEX-…  0.178 Pancreas          Pancreas              
##  2 ENSG0000012… METTL8    GTEX-… -0.533 Pancreas          Pancreas              
##  3 ENSG0000012… METTL8    TCGA-…  2.85  Pancreatic Adeno… Pancreatic Adenocarci…
##  4 ENSG0000012… METTL8    GTEX-…  0.741 Pancreas          Pancreas              
##  5 ENSG0000012… METTL8    GTEX-… -1.06  Pancreas          Pancreas              
##  6 ENSG0000012… METTL8    GTEX-… -0.375 Pancreas          Pancreas              
##  7 ENSG0000012… METTL8    TCGA-…  2.19  Pancreatic Adeno… Pancreatic Adenocarci…
##  8 ENSG0000012… METTL8    TCGA-…  2.07  Pancreatic Adeno… Pancreatic Adenocarci…
##  9 ENSG0000012… METTL8    TCGA-…  2.99  Pancreatic Adeno… Pancreatic Adenocarci…
## 10 ENSG0000012… METTL8    TCGA-…  2.39  Pancreatic Adeno… Pancreatic Adenocarci…
## # ℹ 1,390 more rows
## # ℹ abbreviated name: ¹​`primary disease or tissue`
## # ℹ 6 more variables: primary_site <chr>, `_sample_type` <chr>,
## #   `_gender` <chr>, `_study` <chr>, sample_type <chr>,
## #   primary_site_modified <chr>
m3C_writers_TCGAtargetGTEx_TPMs_Pancreas |> 
  filter(primary_site_modified == 'Pancreas') |> 
  group_by(gene_name) |> 
  rstatix::wilcox_test(TPM ~ sample_type)
## # A tibble: 4 × 8
##   gene_name .y.   group1 group2    n1    n2 statistic        p
## * <chr>     <chr> <chr>  <chr>  <int> <int>     <dbl>    <dbl>
## 1 METTL2A   TPM   Normal Tumor    171   179     1015  1.59e-51
## 2 METTL2B   TPM   Normal Tumor    171   179     1235  5.25e-50
## 3 METTL6    TPM   Normal Tumor    171   179      854. 1.19e-52
## 4 METTL8    TPM   Normal Tumor    171   179     1415  8.82e-49
m3C_writers_expression_pancreas_violin <- 
  m3C_writers_TCGAtargetGTEx_TPMs_Pancreas |> 
  filter(primary_site_modified == 'Pancreas') |> 
  ggplot(aes(
    x = sample_type |> str_remove('ormal|umor'), 
    y = TPM, 
    fill = sample_type, 
    #colour = sample_type
  )) +
  geom_violin(lwd = .1) +
  geom_boxplot(width = .1, fill = 'white', coef = Inf, lwd = .1) +
  #ggforce::geom_sina(size = .2) +
  facet_wrap( ~ gene_name, scales = 'free_x', nrow = 1) +
  #scale_x_discrete(guide = ggh4x::guide_axis_nested(delim = '.') ) +
  #scale_y_continuous(limits = c(0, 13)) +
  scale_fill_manual(values = c('blue', 'red')) +
  labs(x = '', y = 'log2 (normalized count + 1)') +
  #coord_flip() +
  theme_classic(base_size = 7) +
  theme(
    legend.position = 'bottom',
  #  axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)
  )
m3C_writers_expression_pancreas_violin |> 
  ggsave_multiple_formats(
    outdir = fig_expression,
    width = 7, height = 4, fontsize = 7
  )

Sesison info

sessioninfo::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.4.1 (2024-06-14)
##  os       macOS 15.4.1
##  system   aarch64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       Asia/Tokyo
##  date     2025-06-10
##  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  ! package      * version    date (UTC) lib source
##    abind          1.4-8      2024-09-12 [1] CRAN (R 4.4.1)
##    backports      1.5.0      2024-05-23 [1] CRAN (R 4.4.0)
##    bit            4.5.0      2024-09-20 [1] CRAN (R 4.4.1)
##    bit64          4.5.2      2024-09-22 [1] CRAN (R 4.4.1)
##    broom          1.0.7      2024-09-26 [1] CRAN (R 4.4.1)
##    bslib          0.8.0      2024-07-29 [1] CRAN (R 4.4.0)
##    cachem         1.1.0      2024-05-16 [1] CRAN (R 4.4.0)
##    car            3.1-3      2024-09-27 [1] CRAN (R 4.4.1)
##    carData        3.0-5      2022-01-06 [1] CRAN (R 4.4.0)
##    class          7.3-22     2023-05-03 [1] CRAN (R 4.4.1)
##    cli            3.6.3      2024-06-21 [1] CRAN (R 4.4.0)
##    codetools      0.2-20     2024-03-31 [1] CRAN (R 4.4.1)
##    colorspace     2.1-1      2024-07-26 [1] CRAN (R 4.4.0)
##    crayon         1.5.3      2024-06-20 [1] CRAN (R 4.4.0)
##    curl           5.2.3      2024-09-20 [1] CRAN (R 4.4.1)
##    data.table     1.16.2     2024-10-10 [1] CRAN (R 4.4.1)
##    desc           1.4.3      2023-12-10 [1] CRAN (R 4.4.0)
##    devtools       2.4.5      2022-10-11 [1] CRAN (R 4.4.0)
##    dials          1.3.0      2024-07-30 [1] CRAN (R 4.4.0)
##    DiceDesign     1.10       2023-12-07 [1] CRAN (R 4.4.0)
##    digest         0.6.37     2024-08-19 [1] CRAN (R 4.4.1)
##    dplyr        * 1.1.4      2023-11-17 [1] CRAN (R 4.4.0)
##    ellipsis       0.3.2      2021-04-29 [1] CRAN (R 4.4.0)
##    evaluate       1.0.1      2024-10-10 [1] CRAN (R 4.4.1)
##    fansi          1.0.6      2023-12-08 [1] CRAN (R 4.4.0)
##    farver         2.1.2      2024-05-13 [1] CRAN (R 4.4.0)
##    fastmap        1.2.0      2024-05-15 [1] CRAN (R 4.4.0)
##    forcats      * 1.0.0      2023-01-29 [1] CRAN (R 4.4.0)
##    foreach        1.5.2      2022-02-02 [1] CRAN (R 4.4.0)
##    Formula        1.2-5      2023-02-24 [1] CRAN (R 4.4.0)
##    fs             1.6.4      2024-04-25 [1] CRAN (R 4.4.0)
##    furrr          0.3.1      2022-08-15 [1] CRAN (R 4.4.0)
##    future         1.34.0     2024-07-29 [1] CRAN (R 4.4.0)
##    future.apply   1.11.3     2024-10-27 [1] CRAN (R 4.4.1)
##    generics       0.1.3      2022-07-05 [1] CRAN (R 4.4.0)
##    ggforce        0.4.2      2024-02-19 [1] CRAN (R 4.4.0)
##    ggplot2      * 3.5.1      2024-04-23 [1] CRAN (R 4.4.0)
##    ggpubr       * 0.6.0      2023-02-10 [1] CRAN (R 4.4.0)
##    ggrepel        0.9.6      2024-09-07 [1] CRAN (R 4.4.1)
##    ggsignif       0.6.4      2022-10-13 [1] CRAN (R 4.4.0)
##    globals        0.16.3     2024-03-08 [1] CRAN (R 4.4.0)
##    glue           1.8.0      2024-09-30 [1] CRAN (R 4.4.1)
##    gower          1.0.1      2022-12-22 [1] CRAN (R 4.4.0)
##    GPfit          1.0-8      2019-02-08 [1] CRAN (R 4.4.0)
##    gprofiler2     0.2.3      2024-02-23 [1] CRAN (R 4.4.0)
##    gridExtra      2.3        2017-09-09 [1] CRAN (R 4.4.0)
##    gtable         0.3.6      2024-10-25 [1] CRAN (R 4.4.1)
##    hardhat        1.4.0      2024-06-02 [1] CRAN (R 4.4.0)
##    highr          0.11       2024-05-26 [1] CRAN (R 4.4.0)
##    hms            1.1.3      2023-03-21 [1] CRAN (R 4.4.0)
##    htmltools      0.5.8.1    2024-04-04 [1] CRAN (R 4.4.0)
##    htmlwidgets    1.6.4      2023-12-06 [1] CRAN (R 4.4.0)
##    httpuv         1.6.15     2024-03-26 [1] CRAN (R 4.4.0)
##    httr           1.4.7      2023-08-15 [1] CRAN (R 4.4.0)
##    ipred          0.9-15     2024-07-18 [1] CRAN (R 4.4.0)
##    iterators      1.0.14     2022-02-05 [1] CRAN (R 4.4.0)
##    jquerylib      0.1.4      2021-04-26 [1] CRAN (R 4.4.0)
##    jsonlite       1.8.9      2024-09-20 [1] CRAN (R 4.4.1)
##    km.ci          0.5-6      2022-04-06 [1] CRAN (R 4.4.0)
##    KMsurv         0.1-5      2012-12-03 [1] CRAN (R 4.4.0)
##    knitr          1.48       2024-07-07 [1] CRAN (R 4.4.0)
##    labeling       0.4.3      2023-08-29 [1] CRAN (R 4.4.0)
##    later          1.3.2      2023-12-06 [1] CRAN (R 4.4.0)
##    lattice        0.22-6     2024-03-20 [1] CRAN (R 4.4.1)
##    lava           1.8.0      2024-03-05 [1] CRAN (R 4.4.0)
##    lazyeval       0.2.2      2019-03-15 [1] CRAN (R 4.4.0)
##    lhs            1.2.0      2024-06-30 [1] CRAN (R 4.4.0)
##    lifecycle      1.0.4      2023-11-07 [1] CRAN (R 4.4.0)
##    listenv        0.9.1      2024-01-29 [1] CRAN (R 4.4.0)
##    lubridate    * 1.9.3      2023-09-27 [1] CRAN (R 4.4.0)
##    magrittr       2.0.3      2022-03-30 [1] CRAN (R 4.4.0)
##    MASS           7.3-61     2024-06-13 [1] CRAN (R 4.4.0)
##    Matrix         1.7-1      2024-10-18 [1] CRAN (R 4.4.1)
##    memoise        2.0.1      2021-11-26 [1] CRAN (R 4.4.0)
##    mime           0.12       2021-09-28 [1] CRAN (R 4.4.0)
##    miniUI         0.1.1.1    2018-05-18 [1] CRAN (R 4.4.0)
##    munsell        0.5.1      2024-04-01 [1] CRAN (R 4.4.0)
##  R myUtilities  * 0.0.0.9000 <NA>       [?] <NA>
##    nnet           7.3-19     2023-05-03 [1] CRAN (R 4.4.1)
##    parallelly     1.38.0     2024-07-27 [1] CRAN (R 4.4.0)
##    parsnip        1.2.1      2024-03-22 [1] CRAN (R 4.4.0)
##    pillar         1.9.0      2023-03-22 [1] CRAN (R 4.4.0)
##    pkgbuild       1.4.5      2024-10-28 [1] CRAN (R 4.4.1)
##    pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.4.0)
##    pkgload        1.4.0      2024-06-28 [1] CRAN (R 4.4.0)
##    plotly         4.10.4     2024-01-13 [1] CRAN (R 4.4.0)
##    polyclip       1.10-7     2024-07-23 [1] CRAN (R 4.4.0)
##    prodlim        2024.06.25 2024-06-24 [1] CRAN (R 4.4.0)
##    profvis        0.4.0      2024-09-20 [1] CRAN (R 4.4.1)
##    promises       1.3.0      2024-04-05 [1] CRAN (R 4.4.0)
##    purrr        * 1.0.2      2023-08-10 [1] CRAN (R 4.4.0)
##    R6             2.5.1      2021-08-19 [1] CRAN (R 4.4.0)
##    ragg           1.3.3      2024-09-11 [1] CRAN (R 4.4.1)
##    Rcpp           1.0.13     2024-07-17 [1] CRAN (R 4.4.0)
##    readr        * 2.1.5      2024-01-10 [1] CRAN (R 4.4.0)
##    recipes        1.1.0      2024-07-04 [1] CRAN (R 4.4.0)
##    remotes        2.5.0      2024-03-17 [1] CRAN (R 4.4.0)
##    rlang          1.1.4      2024-06-04 [1] CRAN (R 4.4.0)
##    rmarkdown      2.28       2024-08-17 [1] CRAN (R 4.4.0)
##    rpart          4.1.23     2023-12-05 [1] CRAN (R 4.4.1)
##    rprojroot      2.0.4      2023-11-05 [1] CRAN (R 4.4.0)
##    rsample        1.2.1      2024-03-25 [1] CRAN (R 4.4.0)
##    rstatix        0.7.2      2023-02-01 [1] CRAN (R 4.4.0)
##    rstudioapi     0.17.1     2024-10-22 [1] CRAN (R 4.4.1)
##    sass           0.4.9      2024-03-15 [1] CRAN (R 4.4.0)
##    scales         1.3.0      2023-11-28 [1] CRAN (R 4.4.0)
##    sessioninfo    1.2.2      2021-12-06 [1] CRAN (R 4.4.0)
##    shiny          1.9.1      2024-08-01 [1] CRAN (R 4.4.0)
##    stringi        1.8.4      2024-05-06 [1] CRAN (R 4.4.0)
##    stringr      * 1.5.1      2023-11-14 [1] CRAN (R 4.4.0)
##    survival     * 3.7-0      2024-06-05 [1] CRAN (R 4.4.0)
##    survminer    * 0.5.0      2024-10-30 [1] CRAN (R 4.4.1)
##    survMisc       0.5.6      2022-04-07 [1] CRAN (R 4.4.0)
##    svglite        2.1.3      2023-12-08 [1] CRAN (R 4.4.0)
##    systemfonts    1.1.0      2024-05-15 [1] CRAN (R 4.4.0)
##    textshaping    0.4.0      2024-05-24 [1] CRAN (R 4.4.0)
##    tibble       * 3.2.1      2023-03-20 [1] CRAN (R 4.4.0)
##    tidyr        * 1.3.1      2024-01-24 [1] CRAN (R 4.4.0)
##    tidyselect     1.2.1      2024-03-11 [1] CRAN (R 4.4.0)
##    tidyverse    * 2.0.0      2023-02-22 [1] CRAN (R 4.4.0)
##    timechange     0.3.0      2024-01-18 [1] CRAN (R 4.4.0)
##    timeDate       4041.110   2024-09-22 [1] CRAN (R 4.4.1)
##    tune           1.2.1      2024-04-18 [1] CRAN (R 4.4.0)
##    tweenr         2.0.3      2024-02-26 [1] CRAN (R 4.4.0)
##    tzdb           0.4.0      2023-05-12 [1] CRAN (R 4.4.0)
##    urlchecker     1.0.1      2021-11-30 [1] CRAN (R 4.4.0)
##    usethis        3.0.0      2024-07-29 [1] CRAN (R 4.4.0)
##    utf8           1.2.4      2023-10-22 [1] CRAN (R 4.4.0)
##    vctrs          0.6.5      2023-12-01 [1] CRAN (R 4.4.0)
##    viridisLite    0.4.2      2023-05-02 [1] CRAN (R 4.4.0)
##    vroom          1.6.5      2023-12-05 [1] CRAN (R 4.4.0)
##    withr          3.0.2      2024-10-28 [1] CRAN (R 4.4.1)
##    workflows      1.1.4      2024-02-19 [1] CRAN (R 4.4.0)
##    xfun           0.48       2024-10-03 [1] CRAN (R 4.4.1)
##    xtable         1.8-4      2019-04-21 [1] CRAN (R 4.4.0)
##    yaml           2.3.10     2024-07-26 [1] CRAN (R 4.4.0)
##    yardstick      1.3.1      2024-03-21 [1] CRAN (R 4.4.0)
##    zoo            1.8-12     2023-04-13 [1] CRAN (R 4.4.0)
## 
##  [1] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
## 
##  R ── Package was removed from disk.
## 
## ──────────────────────────────────────────────────────────────────────────────